import plotly.offline as pyo
from plotly.graph_objs import *
import chart_studio.plotly as py
import pandas as pd
from pandas import DataFrame
pyo.offline.init_notebook_mode()
from scipy import stats
lifeExpectancy = pd.read_csv(r"../Data/LifeExpectancyCigarettePrices.csv", index_col = 0)
lifeExpectancy['text'] = lifeExpectancy.apply(lambda x:
"<b>{}</b><br>Life expectancy for {}s at 60: {} years<br>Price of cigarettes: ${:.2f}".format(x['Country'],
x['Sex'],
x['Years'],
float(x['Most sold cigarette brand (US$)'])), axis = 1)
regions = list(lifeExpectancy['Region'].unique())
sexes = list(lifeExpectancy['Sex'].unique())
markerLookup = {'Eastern Mediterranean' : {'symbol' : 'circle'},
'Europe' : {'symbol' : 'square'},
'Africa' : {'symbol' : 'diamond'},
'Americas' : {'symbol' : 'triangle-up'},
'Western Pacific' : {'symbol' : 'cross'},
'South-East Asia' : {'symbol' : 'x'},
'Male' : {'color' : '#663399'},
'Female' :{'color' : '#FF6347'}}
traces = []
for sex in sexes:
for reg in regions:
traces.append({'type' : 'scatter',
'mode' : 'markers',
'x' : lifeExpectancy.loc[(lifeExpectancy['Region'] == reg) & (lifeExpectancy['Sex'] == sex),
'Most sold cigarette brand (US$)'],
'y' : lifeExpectancy.loc[(lifeExpectancy['Region'] == reg) & (lifeExpectancy['Sex'] == sex), 'Years'],
'text' : lifeExpectancy.loc [(lifeExpectancy['Region'] == reg) & (lifeExpectancy['Sex'] == sex),'text'],
'legendgroup' : reg,
'hoverinfo' : 'text',
'marker' : {'color' : markerLookup[sex]['color'],
'symbol' : markerLookup[reg]['symbol'],
'opacity' : 0.7},
'name' : "{} {}s".format(reg, sex)})
layout = {'title' : 'Life Expectancy Against Price of Most Popular Brand of Cigarettes (2011)',
'xaxis' : {'title' : 'Price of most popular brand of cigarettes',
'range' : [0,
lifeExpectancy['Most sold cigarette brand (US$)'].max() * 1.05],
'tickformat' : "${:}"},
'yaxis' : {'title' : 'Life expectancy at age 60 (years)',
'range' : [lifeExpectancy['Years'].min()*0.9,
lifeExpectancy['Years'].max()*1.05],},
'hovermode' : 'closest'}
fig = Figure(data=traces, layout=layout)
pyo.iplot(fig)
slope, intercept, r_value, p_value, std_err = stats.linregress(lifeExpectancy['Most sold cigarette brand (US$)'],
lifeExpectancy['Years'])
slope, intercept
(0.8372550180526643, 17.198488595559095)
r_value**2, p_value, std_err
(0.3135512088415398, 1.9844110607238766e-31, 0.06511084122643646)
xValRange = [0, lifeExpectancy['Most sold cigarette brand (US$)'].max()]
line = [slope * xValRange[0] + intercept, slope * xValRange[1] + intercept]
line
[17.198488595559095, 28.333980335659533]
traces.append({'type' : 'scatter',
'mode' : 'lines',
'x' : xValRange,
'y' : line,
'marker' : {'color' : '#333'},
'hoverinfo' : 'none',
'showlegend' : False })
fig = Figure(data=traces, layout=layout)
pyo.iplot(fig)
equationAnnotation = {'text' : "y = {:.2f}x + {:.2f}<br>R<sup>2</sup> = {:.2f}".format(slope, intercept, r_value**2),
'xref' : 'x',
'yref' : 'y',
'x' : 10,
'y' : 28,
'showarrow' : False}
layout['annotations'] = [equationAnnotation]
fig = Figure(data=traces, layout=layout)
pyo.iplot(fig)